Contents

0.1 Load Packages

library(DESeq2)
library(tximport)
library(biomaRt)
library(tidyverse)
library(ggplot2)
library(ggrepel)
library(readxl)
library(Rtsne)
library(pheatmap)
library(IHW)
library(Questools)
library(dplyr)
library(grid)
library(gridExtra)
library(jyluMisc)

0.2 bioMart

ensembl.mart <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
annotations <-
getBM(
attributes = c(
'hgnc_symbol',
'refseq_mrna',
'transcript_biotype'
),
mart = ensembl.mart
)
annotations <- annotations %>% filter(hgnc_symbol != "" & refseq_mrna != "")

0.3 Read Counts from Salmon

files <- file.path("../data/alignments/", list.files("../data/alignments"),"quant.sf")
sample.ids <- list.files("../data/alignments") %>% map(function(x) gsub("_sequence", "", strsplit(x, split ="lane8")[[1]][2]))
names(files) <- sample.ids
tx2gene <- annotations %>% dplyr::select(refseq_mrna, hgnc_symbol)
txi.salmon <- tximport(files, type = "salmon", tx2gene = tx2gene, ignoreTxVersion = TRUE)

0.4 DESeq Dataset

sample.table <-
  data.frame(row.names = sample.ids,
  fileName = list.files("../data/alignments/"))
dds <- DESeqDataSetFromTximport(txi.salmon, sample.table, design = ~1)

0.5 Annotating Samples

sample.list <- read_xlsx("../data/metadata/sampleList.xlsx", sheet = 2)
plate <- sample.list$`Plate well number`
id <- sample.list$ID
sample.name<- sample.list$`Sample Name`
cell_line <- sample.name %>% map(function(x) ifelse(length(strsplit(x, split = "_")[[1]]) == 4, paste(strsplit(x, split = "_")[[1]][c(2,3)], collapse = "_"), strsplit(x, split = "_")[[1]][2]))
cell_line <- unlist(cell_line)
treatment <- sample.name %>% map(function(x) ifelse(length(strsplit(x, split = "_")[[1]]) == 4, paste(strsplit(x, split = "_")[[1]][4], collapse = "_"), strsplit(x, split = "_")[[1]][3]))
treatment <- unlist(treatment)
sample.annotations <- data.frame(row.names = id, cell_line = as.character(cell_line), treatment = as.character(treatment), plate = plate)
# write.csv(sample.annotations, file = "../data/metadata/sample_annotations.csv", row.names = T)
colData(dds) <- cbind(colData(dds), sample.annotations[rownames(colData(dds)),])

0.6 Annotating Transcripts

gene.annotations <- annotations[match(rownames(dds), annotations$hgnc_symbol),]
rownames(gene.annotations) <- rownames(dds)
rowData(dds) <- gene.annotations
save(dds, file = "../data/RData/3RNAseq.RData")

0.7 GGM on Expression Matirix

dds <- estimateSizeFactors(dds)
dds <- dds[which(rowSums(counts(dds)) > 50),]
dds.vst <- varianceStabilizingTransformation(dds, blind = TRUE)
exprMat <- assay(dds.vst)
sds <- rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:5000],]
g <- ggm(data.frame(exprMat), rho = 0.5)
g$network

0.8 PCA on Expression Matirix

pcRes <- prcomp(t(exprMat), scale. = FALSE, center = TRUE)
eigs <- pcRes$sdev^2
eigs <- eigs/sum(eigs)

plotTab <- data.frame(pcRes$x[,c(1,2)])
plotTab$sampleID <- rownames(plotTab)
plotTab$cellLine <- sample.annotations[plotTab$sampleID, "cell_line"]
plotTab$treatment <- sample.annotations[plotTab$sampleID, "treatment"]

ggplot(plotTab, aes(x=PC1, y = PC2, label = sampleID, color = cellLine, shape = treatment)) +
  geom_point() +
  geom_text_repel(aes(PC1,PC2, label = sampleID)) +
  theme_bw()

0.9 t-SNE on Expression Matirix

tsneRun <- function(distMat,perplexity=10,theta=0,max_iter=5000) {
  tsneRes <- Rtsne(distMat, perplexity = perplexity, theta = theta, 
                   max_iter = max_iter, is_distance = TRUE, dims =2)
  tsneRes <- tsneRes$Y
  rownames(tsneRes) <- labels(distMat)
  colnames(tsneRes) <- c("x","y")
  tsneRes
}

distViab <- dist(t(exprMat))

plotTab <- data.frame(tsneRun(distViab, perplexity = 2, max_iter = 10000))

#plot
plotTab$sampleID <- rownames(plotTab)
plotTab$cellLine <- sample.annotations[plotTab$sampleID, "cell_line"]
plotTab$treatment <- sample.annotations[plotTab$sampleID, "treatment"]

ggplot(plotTab, aes(x=x, y = y, label = sampleID, color = cellLine, shape = treatment)) +
  geom_point() +
  geom_text_repel(aes(x,y, label = sampleID)) +
  theme_bw()

0.10 Sample Similarities

colAnno <- data.frame(plotTab[,c("cellLine", "treatment")])
corMat <- cor(exprMat)
pheatmap(corMat, annotation_col = colAnno)

0.11 Differential Expressed Genes between Treatments

dds$id <- unlist(sample.ids)
dds$cell_line <- sample.annotations[dds$id, "cell_line"]
dds$treatment <- sample.annotations[dds$id, "treatment"]
dds$treatment <- droplevels(dds$treatment)
dds$cell_line <- droplevels(dds$cell_line)
design(dds) <- ~ cell_line

ddsCell <- DESeq(dds)
results.cell <- results(ddsCell, contrast = c("cell_line" , "Raji", "Namalwa"))
results.df <- as.data.frame(results.cell)
summary(results.df)
##     baseMean         log2FoldChange          lfcSE       
##  Min.   :     1.21   Min.   :-36.12637   Min.   :0.0645  
##  1st Qu.:    10.25   1st Qu.: -0.69593   1st Qu.:0.2508  
##  Median :    54.29   Median :  0.09153   Median :0.7023  
##  Mean   :   414.27   Mean   : -0.08061   Mean   :1.8143  
##  3rd Qu.:   217.68   3rd Qu.:  0.57359   3rd Qu.:2.0698  
##  Max.   :115330.63   Max.   : 31.50970   Max.   :8.1608  
##                                                          
##       stat               pvalue             padj       
##  Min.   :-27.46395   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.: -1.16058   1st Qu.:0.01613   1st Qu.:0.0252  
##  Median :  0.06312   Median :0.28527   Median :0.3964  
##  Mean   : -0.02558   Mean   :0.37789   Mean   :0.4437  
##  3rd Qu.:  0.97044   3rd Qu.:0.72498   3rd Qu.:0.8615  
##  Max.   : 26.32492   Max.   :0.99987   Max.   :0.9998  
##                                        NA's   :1507
res.05 <- results(ddsCell, alpha = 0.05)
table(res.05$padj < 0.05)
## 
## FALSE  TRUE 
##  6945  3729
resIHW <- results(ddsCell, filterFun=ihw)
summary(resIHW)
## 
## out of 11105 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 2127, 19%
## LFC < 0 (down)     : 2031, 18%
## outliers [1]       : 0, 0%
## [1] see 'cooksCutoff' argument of ?results
## see metadata(res)$ihwResult on hypothesis weighting
results.cell <- list()
design(dds) <- ~treatment
print("******")
## [1] "******"
dds.cells <- dds$cell_line %>% map(function(x) dds[,dds$cell_line == x]) %>% map(function(x) x[apply(counts(x), 1, function(x) all(x > 10)),])
results.cell <- dds.cells %>% map(function(x) DESeq(x)) %>% map(function(x) results(x, contrast = c("treatment", "sgTP53", "NT")))
names(results.cell) <- dds$cell_line

0.12 P-Value Distributions for Treatment

plotList <- list()
results.cell %>% map(function(x) tibble(pval = x$pvalue)) %>% map(function(x) ggplot(x, aes(pval)) + geom_histogram(bins = 20, fill = "blue", col="blue", alpha=0.3) +
      theme_bw() + theme(plot.title = element_text(hjust =0.5)) +
      xlab("raw p values")) -> plots
plotList<- 1:length(plots) %>% map(function(x) plots[[x]] + ggtitle(sprintf("sgTP53 vs NT in %s", names(results.cell)[x])))
names(plotList) <- names(results.cell)
plotList
## $BL60

## 
## $Ramos

## 
## $Ramos

## 
## $Namalwa

## 
## $Namalwa

## 
## $Raji

## 
## $Raji

## 
## $BJAB

## 
## $BJAB

## 
## $HBL2

## 
## $Namalwa_Old

## 
## $HBL2

## 
## $Maver1

## 
## $Maver1

## 
## $THP1

## 
## $THP1

## 
## $RPMI8226

## 
## $RPMI8226

## 
## $LY47

## 
## $LY47

## 
## $Seraphine

## 
## $Namalwa_Old

## 
## $Seraphine

## 
## $Raji_Old

## 
## $Raji_Old

## 
## $Ramos_Old

## 
## $Ramos_Old

## 
## $BL60_Old

## 
## $BL60_Old

## 
## $BL60

0.13 Enrichment Analysis for Different Treatments

# createInput <- function(DEres, pCut = 0.05, ifFDR = FALSE, rankBy = "stat") {
#   if (ifFDR) {
#     inputTab <- data.frame(DEres) %>% rownames_to_column(var = "symbol") %>% filter(padj <= pCut)
#   } else {
#     inputTab <- data.frame(DEres) %>% rownames_to_column(var = "symbol") %>%filter(pvalue <= pCut)
#   }
#   
#   inputTab <- arrange(inputTab, pvalue) %>% filter(!duplicated(symbol)) %>% select_("symbol", rankBy) %>% data.frame(stringsAsFactors = FALSE)
#   rownames(inputTab) <- inputTab$symbol
#   inputTab$symbol <- NULL
#   colnames(inputTab) <- "stat"
#   return(inputTab)
# }
# 
# gmts <- list(H=system.file("extdata","h.all.v5.1.symbols.gmt",package="BloodCancerMultiOmics2017"),
#             C6=system.file("extdata","c6.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
#             KEGG=system.file("extdata","c2.cp.kegg.v5.1.symbols.gmt", package = "BloodCancerMultiOmics2017"))
# 
# 
# results.cell %>% map(function(x) createInput(x, pCut = 0.1, ifFDR = FALSE)) %>% map(function(x) names(gmts) %>% map(function(y) runGSEA(inputTab =x, gmtFile = gmts[[y]], GSAmethod = "page"))) -> p
# 
# p %>% map(function(x) x[[1]]) -> res.Halmark
# p %>% map(function(x) x[[2]]) -> res.Oncogenetic
# p %>% map(function(x) x[[3]]) -> res.KEGG
# res.Halmark
# res.Oncogenetic
# res.KEGG
# enBar.Halmark <- plotEnrichmentBar(res.Halmark, pCut = 0.05, ifFDR = FALSE, setName = "HALLMARK")
# plot(enBar.Halmark[1:20])
# enBar.Oncogenetic <- plotEnrichmentBar(res.Oncogenetic, pCut = 0.05, ifFDR = FALSE, setName = "Oncogenetic")
# plot(enBar.Oncogenetic[1:20])
# enBar.KEGG <- plotEnrichmentBar(res.KEGG, pCut = 0.05, ifFDR = FALSE, setName = "KEGG")
# plot(enBar.KEGG[1:20])

0.14 P-Value Distributions for all Cell Lines

design(dds) <- ~ cell_line + treatment
dds <- DESeq(dds)
res.all <- results(dds, contrast = c("treatment", "sgTP53", "NT"))
plotTab <- tibble(pval = res.all$pvalue)
p <- ggplot(plotTab, aes(pval)) + geom_histogram(bins = 20, fill = "blue", col="blue", alpha=0.3) +
  theme_bw() + theme(plot.title = element_text(hjust =0.5)) +
  ggtitle("sgTP53 vs NT in all cell lines") +
  xlab("raw p values")
p

0.15 Enrichment Analysis for Different Treatments for all Cell Lines

# inputTab <- createInput(res.all, pCut = 0.1, ifFDR = TRUE)
# enRes <- runGSEA(inputTab = inputTab, gmtFile = gmts$H, GSAmethod = "page")
# enRes$Name <- gsub("HALLMARK_","", enRes$Name)
# enRes <- list(enRes)
# names(enRes) <- "sgTP53"
# enBar <- plotEnrichmentBar(enRes, pCut = 0.1, ifFDR = FALSE, setName = "HALLMARK")
# plot(enBar)
# inputTab <- createInput(res.all, pCut = 0.1, ifFDR = TRUE)
# enRes <- runGSEA(inputTab = inputTab, gmtFile = gmts$C6, GSAmethod = "page")
# enRes$Name <- gsub("Oncogenetic_","", enRes$Name)
# enRes <- list(enRes)
# names(enRes) <- "sgTP53"
# enBar <- plotEnrichmentBar(enRes, pCut = 0.11, ifFDR = FALSE, setName = "Oncogenetic")
# plot(enBar)
# inputTab <- createInput(res.all, pCut = 0.1, ifFDR = TRUE)
# enRes <- runGSEA(inputTab = inputTab, gmtFile = gmts$C6, GSAmethod = "page")
# enRes$Name <- gsub("KEGG_","", enRes$Name)
# enRes <- list(enRes)
# names(enRes) <- "sgTP53"
# enBar <- plotEnrichmentBar(enRes, pCut = .1, ifFDR = FALSE, setName = "KEGG")
# plot(enBar)